
{install.packages("survey")
install.packages("srvyr")
install.packages("ggplot2")
install.packages("dplyr")
install.packages("data.table")
install.packages("questionr")
install.packages("ggthemes")
install.packages("ggpubr")
install.packages("moments")
install.packages("ggrepel")
}

{library(data.table)
library(survey)
library(srvyr)
library(dplyr)
library(data.table)
library(questionr)
library(ggthemes)
library(ggplot2)
library(ggpubr)
library(moments)
library(ggrepel)
}

### Loading data from csv

PBS_data <- read.csv2("PBS_zbiór.csv", sep = ",")

PBS_data <- as.data.table(PBS_data)

PBS_data$waga <- as.numeric(PBS_data$waga)

### Defining labels - needed later

labels <- c("Public administration", "Public order and safety",
            "Education","Pensions","Healthcare", 
            "Social protection","Infrastructure", "Environmental protection")

labels <- as.data.table(labels)

### Treatment

### Actual public spending values

PBS_data[, I4_1_a := 6] 

PBS_data[, I4_2_a := 9 ]

PBS_data[, I4_3_a := 13]

PBS_data[, I4_4_a := 34]

PBS_data[, I4_5_a := 13]

PBS_data[, I4_6_a := 11]

PBS_data[, I4_7_a := 11]

PBS_data[, I4_8_a := 3]

### Distribution error by respondents

PBS_data[, I4_1_e := -abs(I4_1_a - I4_1)]

PBS_data[, I4_2_e := -abs(I4_2_a - I4_2) ]

PBS_data[, I4_3_e := -abs(I4_3_a - I4_3)]

PBS_data[, I4_4_e := -abs(I4_4_a - I4_4)]

PBS_data[, I4_5_e := -abs(I4_5_a - I4_5)]

PBS_data[, I4_6_e := -abs(I4_6_a - I4_6)]

PBS_data[, I4_7_e := -abs(I4_7_a - I4_7)]

PBS_data[, I4_8_e := -abs(I4_8_a - I4_8 )]

### Average error by respondents

PBS_data[, I4_e := (I4_1_e + I4_2_e + I4_3_e + I4_4_e
                    + I4_5_e + I4_6_e + I4_7_e + I4_8_e)/8]

### Control

PBS_data[, P13_1_e := -abs(I4_1_a - P13_1)]

PBS_data[, P13_2_e := -abs(I4_2_a - P13_2) ]

PBS_data[, P13_3_e := -abs(I4_3_a - P13_3)]

PBS_data[, P13_4_e := -abs(I4_4_a - P13_4)]

PBS_data[, P13_5_e := -abs(I4_5_a - P13_5)]

PBS_data[, P13_6_e := -abs(I4_6_a - P13_6)]

PBS_data[, P13_7_e := -abs(I4_7_a - P13_7)]

PBS_data[, P13_8_e := -abs(I4_8_a - P13_8)]

### Average error by respondents

PBS_data[, P13_e := (P13_1_e + P13_2_e + P13_3_e + P13_4_e
                      + P13_5_e + P13_6_e + P13_7_e + P13_8_e)/8]

### Error across experimental groups

PBS_data[,.(mean_treatment = mean(I4_e, na.rm = TRUE), mean_control = mean(P13_e, na.rm = TRUE))]

### Error for both groups

PBS_data[, P13_I4_e := ifelse(is.na(P13_e), I4_e, P13_e)]

### Error for both groups for each category

PBS_data[, I4_1_e := ifelse(is.na(I4_1_e), P13_1_e, I4_1_e)]

PBS_data[, I4_2_e := ifelse(is.na(I4_2_e), P13_2_e, I4_2_e)]

PBS_data[, I4_3_e := ifelse(is.na(I4_3_e), P13_3_e, I4_3_e)]

PBS_data[, I4_4_e := ifelse(is.na(I4_4_e), P13_4_e, I4_4_e)]

PBS_data[, I4_5_e := ifelse(is.na(I4_5_e), P13_5_e, I4_5_e)]

PBS_data[, I4_6_e := ifelse(is.na(I4_6_e), P13_6_e, I4_6_e)]

PBS_data[, I4_7_e := ifelse(is.na(I4_7_e), P13_7_e, I4_7_e)]

PBS_data[, I4_8_e := ifelse(is.na(I4_8_e), P13_8_e, I4_8_e)]

### Perceived distribution for both groups

PBS_data[, I4_1 := ifelse(is.na(I4_1), P13_1, I4_1)]

PBS_data[, I4_2 := ifelse(is.na(I4_2), P13_2, I4_2)]

PBS_data[, I4_3 := ifelse(is.na(I4_3), P13_3, I4_3)]

PBS_data[, I4_4 := ifelse(is.na(I4_4), P13_4, I4_4)]

PBS_data[, I4_5 := ifelse(is.na(I4_5), P13_5, I4_5)]

PBS_data[, I4_6 := ifelse(is.na(I4_6), P13_6, I4_6)]

PBS_data[, I4_7 := ifelse(is.na(I4_7), P13_7, I4_7)]

PBS_data[, I4_8 := ifelse(is.na(I4_8), P13_8, I4_8)]

### Creating variable for age 

PBS_data[, age := 2021 - Rok_urodzenia]

PBS_data[, treatment := ifelse(PBS_data$wariant == "Z interwencja", "1", "0")]

### Establishing survey design 

### Survey design for survey

srs_design <- svydesign(id=~nr_wyw, weights=~waga, data=PBS_data)

### Treatment

srs_treatment <- svydesign(id=~nr_wyw, weights=~waga, data=PBS_data[wariant == 1])

### Control 

srs_control <- svydesign(id=~nr_wyw, weights=~waga, data=PBS_data[wariant == 0])



### I4 ~ distribution of public spending according to respondents 

I4 <- svymean(~I4_1+I4_2+I4_3 + I4_4 + I4_5 + I4_6 + I4_7 + I4_8, srs_design, na = TRUE)

I4 <- as.data.table(I4)



I4[, labels := labels]

### I5 ~ areas for reduction in public spending according to respondents

I5 <- svymean(~I5_1+I5_2 + I5_3 + I5_4 + I5_5 + I5_6 + I5_7 + I5_8, srs_design, na = TRUE)

I5 <- as.data.table(I5)

I5[, odp := rep(c("Nie wskazano", "Wskazano"), length(I5$mean)/2)]

I5 <- I5[odp == 'Wskazano']

I5[, category := labels]

### I6 ~ areas for increase in public spending according to respondents (treatment group)

I6 <- svymean(~I6_1+I6_2+I6_3 + I6_4 + I6_5 + I6_6 + I6_7 + I6_8, srs_design, na = TRUE)

I6 <- as.data.table(I6)

I6[, odp := rep(c("Nie wskazano", "Wskazano"), length(I6$mean)/2)]

I6 <- I6[odp == 'Wskazano']

I6[, category := labels]

I6 <- I6[, .(category, mean, SE)]

I6[, Status := "Treatment"]

### I6 ~ areas for increase in public spending according to respondents (treatment group)

P12 <- svymean(~P12_1+P12_2+P12_3 + P12_4 + P12_5 + P12_6 + P12_7 + P12_8, srs_design, na = TRUE)

P12 <- as.data.table(P12)

P12[, odp := rep(c("Nie wskazano", "Wskazano"), length(P12$mean)/2)]

P12 <- P12[odp == 'Wskazano']

P12[, category := labels]

P12 <- P12[, .(category, mean, SE)]

P12[, Status := "Control"]

### Merge P12 and I6 (control vs treatment)

I_6_P_12 <- rbind(I6, P12)

I_6_P_12 <- I_6_P_12[order(-mean), . (category, mean = mean*100, SE = SE*100, Status)]

### Chart P_6_P_12

I_6_P_12 <- ggplot(I_6_P_12, aes(x = reorder(category, -mean), y=mean, fill = Status)) + 
geom_bar(stat="identity", color="black", 
         position=position_dodge()) +
  geom_errorbar(aes(ymin = mean - SE, 
                    ymax = mean + SE), width=.2,
                position=position_dodge(.9)) +
  theme_light() + scale_fill_manual (values=c( "firebrick", "forestgreen","dodgerblue4", "gold1", "darkgrey")) + 
  theme(axis.text.x = element_text(color = "grey20", size = 20, angle = 90, hjust = .5, vjust = .5, face = "plain"),
        axis.text.y = element_text(color = "grey20", size = 20, angle = 0, hjust = 1, vjust = 0, face = "plain"),  
        axis.title.x = element_text(color = "grey20", size = 20, angle = 0, hjust = .5, vjust = 0, face = "plain"),
        axis.title.y = element_text(color = "grey20", size = 20, angle = 90, hjust = .5, vjust = .5, face = "plain"),
        legend.text = element_text(color = "grey20", size = 20, hjust = .5, vjust = .5, face = "plain"),
        legend.title = element_text(color = "grey20", size = 20, face = "plain"),
        legend.position="bottom",
        legend.background = element_blank(),
        legend.box.background = element_rect(colour = "grey20")) +
  labs(color='Status') + labs(x = "Category of spending", y = "Share of respondets (%)", color = "")

ggsave("I_6_P_12.jpeg", device='jpeg', dpi=1000)

### I5 ~ areas for decrease in public spending according to respondents (treatment group)

I5 <- svymean(~I5_1 + I5_2 + I5_3 + I5_4 + I5_3 + I5_5 + I5_6 + I5_7 + I5_8, srs_design, na = TRUE)

I5 <- as.data.table(I5)

I5[, odp := rep(c("Nie wskazano", "Wskazano"), length(I5$mean)/2)]

I5 <- I5[odp == 'Wskazano']

I5[, category := labels]

I5 <- I5[, .(category, mean, SE)]

I5[, Status := "Treatment"]

### P11 ~ areas for decrease in public spending according to respondents (treatment group)

P11 <- svymean(~P11_1+P11_2+P11_3 + P11_4 + P11_5 + P11_6 + P11_7 + P11_8, srs_design, na = TRUE)

P11 <- as.data.table(P11)

P11[, odp := rep(c("Nie wskazano", "Wskazano"), length(P11$mean)/2)]

P11 <- P11[odp == 'Wskazano']

P11[, category := labels]

P11 <- P11[, .(category, mean, SE)]

P11[, Status := "Control"]

### Merge P11 and I5 (control vs treatment)

I_5_P_11 <- rbind(I5, P11)

# Extract mean values for "Control" group
control_means <- I_5_P_11[I_5_P_11$Status == "Control", .(category, mean)]

# Create an ordering factor based on 'control_means'
category_order <- control_means[order(-mean)]$category
I_5_P_11$category <- factor(I_5_P_11$category, levels = category_order)

I_5_P_11 <- ggplot(I_5_P_11, aes(x = category, y = mean*100, fill = Status)) + 
  geom_bar(stat = "identity", color = "black", position = position_dodge()) +
  geom_errorbar(aes(ymin = mean - SE, ymax = mean + SE), width = .2, position = position_dodge(.9)) +
  theme_light() + 
  scale_fill_manual(values = c("firebrick", "forestgreen", "dodgerblue4", "gold1", "darkgrey")) + 
  theme(axis.text.x = element_text(color = "grey20", size = 20, angle = 90, hjust = .5, vjust = .5, face = "plain"),
        axis.text.y = element_text(color = "grey20", size = 20, angle = 0, hjust = 1, vjust = 0, face = "plain"),  
        axis.title.x = element_text(color = "grey20", size = 20, angle = 0, hjust = .5, vjust = 0, face = "plain"),
        axis.title.y = element_text(color = "grey20", size = 20, angle = 90, hjust = .5, vjust = .5, face = "plain"),
        legend.text = element_text(color = "grey20", size = 20, hjust = .5, vjust = .5, face = "plain"),
        legend.title = element_text(color = "grey20", size = 20, face = "plain"),
        legend.position = "bottom",
        legend.background = element_blank(),
        legend.box.background = element_rect(colour = "grey20")) +
  labs(x = "Category of spending", y = "Share of respondents (%)", fill = "Treatment Status")

ggsave("I_5_P_11.jpeg", device='jpeg', dpi=900)

### Age and treatment effect
I5_3_age_t <- as.data.table(svyby(~I5_3, ~M2, srs_treatment, svymean, na = TRUE))

I5_3_age_t <- I5_3_age_t[, Status := "Treatment"]

I5_3_age_c <- as.data.table(svyby(~I5_3, ~M2, srs_control, svymean, na = TRUE))

I5_3_age_c <- I5_3_age_c[, Status := "Control"]
  
I5_3_age <- rbind(I5_3_age_t, I5_3_age_c)

I5_3_age$M2 <- factor(I5_3_age$M2, levels = unique(I5_3_age$M2))

 ggplot(I5_3_age, aes(x = M2, y=I5_3, fill = Status)) + 
  geom_bar(stat="identity", color="black", 
           position=position_dodge()) +
  geom_errorbar(aes(ymin = I5_3 - se, 
                    ymax = I5_3 + se), width=.2,
                position=position_dodge(.9)) +
  theme_light() + scale_fill_manual (values=c("dodgerblue4", "gold1", "darkgrey")) + 
  theme(axis.text.x = element_text(color = "grey20", size = 20, angle = 90, hjust = .5, vjust = .5, face = "plain"),
        axis.text.y = element_text(color = "grey20", size = 20, angle = 0, hjust = 1, vjust = 0, face = "plain"),  
        axis.title.x = element_text(color = "grey20", size = 20, angle = 0, hjust = .5, vjust = 0, face = "plain"),
        axis.title.y = element_text(color = "grey20", size = 20, angle = 90, hjust = .5, vjust = .5, face = "plain"),
        legend.text = element_text(color = "grey20", size = 20, hjust = .5, vjust = .5, face = "plain"),
        legend.title = element_text(color = "grey20", size = 20, face = "plain"),
        legend.position="bottom",
        legend.background = element_blank(),
        legend.box.background = element_rect(colour = "grey20")) +
  labs(x = "Age", y = "Share of respondets (%)", color='Treatment status')

ggsave("I5_1_age.jpeg", device='jpeg', dpi=900)

### Pl10 ~ indicated share of wasted spending by respondents

svymean(~Pl10, srs_design, na = TRUE)
 
svyby(~Pl10, ~wariant, srs_design, svymean, na = TRUE)

### Share of wasted spending - density function

Pl10 <- ggsurvey(srs_design) +
  aes(Pl10, color = treatment) +
  geom_density(alpha=.5, size=0.8, show_guide=FALSE) +
  labs(color='Treatment status', x = "Indicated share of wasted spending (%)", y = "Density function", color = "") +
  stat_density(aes(x=Pl10, colour=treatment), 
               geom="line",position="identity") +
  theme_light() + scale_colour_manual (values=c("firebrick", "forestgreen", "dodgerblue4")) + 
  theme(axis.text.x = element_text(color = "grey20", size = 20, angle = 90, hjust = .5, vjust = .5, face = "plain"),
        axis.text.y = element_text(color = "grey20", size = 20, angle = 0, hjust = 1, vjust = 0, face = "plain"),  
        axis.title.x = element_text(color = "grey20", size = 20, angle = 0, hjust = .5, vjust = 0, face = "plain"),
        axis.title.y = element_text(color = "grey20", size = 20, angle = 90, hjust = .5, vjust = .5, face = "plain"),
        legend.text = element_text(color = "grey20", size = 20, hjust = .5, vjust = .5, face = "plain"),
        legend.title = element_text(color = "grey20", size = 20, face = "plain"),
        legend.position="bottom",
        legend.background = element_blank(),
        legend.box.background = element_rect(colour = "grey20")) +
  scale_x_continuous(breaks = seq(0, 100, by = 10)) 

ggsave("Pl10.jpeg", device='jpeg', dpi=900)

### Cumulative distribution function of absolute negative error 

I4_1_e <- svymean(~I4_1_e + I4_2_e + I4_3_e + I4_4_e + I4_5_e + I4_6_e + I4_7_e + I4_8_e, srs_design, na = TRUE)

I4_1_e <- as.data.table(I4_1_e)

I4_1_e[, category := labels]

I4_1_e <- as.data.table(I4_1_e)

I4_1_e[order(-mean)]

P13_I4_e_mean <- svymean(~P13_I4_e, srs_design, na = TRUE)

P13_I4_e_mean <- as.numeric (P13_I4_e_mean[1])

svyquantile(~P13_I4_e, srs_design, quantile=c(0.30,0.5,0.75, 0.9), ci=TRUE)

P13_I4_e_median <- -7.75

cdf <- ecdf(PBS_data$P13_I4_e)
cdf(-5.354778)

P13_I4_e <- ggsurvey(srs_design) +
  aes(P13_I4_e) +
  labs(x = "Negative absolute error (percentage points)", y = "CDF", color = "") +
  stat_ecdf(size = 1.2, geom = "step") + theme_light() + scale_fill_brewer(palette = "Spectral") + 
  theme(axis.text.x = element_text(color = "grey20", size = 18, angle = 90, hjust = .5, vjust = .5, face = "plain"),
        axis.text.y = element_text(color = "grey20", size = 18, angle = 0, hjust = 1, vjust = 0, face = "plain"),  
        axis.title.x = element_text(color = "grey20", size = 18, angle = 0, hjust = .5, vjust = 0, face = "plain"),
        axis.title.y = element_text(color = "grey20", size = 18, angle = 90, hjust = .5, vjust = .5, face = "plain"),
        legend.text = element_text(color = "grey20", size = 18, hjust = .5, vjust = .5, face = "plain"),
        legend.position="bottom",
        legend.background = element_blank(),
        legend.box.background = element_rect(colour = "grey20")) +
  geom_vline(xintercept = P13_I4_e_mean, linetype="solid", 
             color = "darkred", size=1.6) +
  geom_vline(xintercept = P13_I4_e_median, linetype="solid", 
             color = "darkgreen", size=1.6) +
  geom_vline(xintercept = -5.354778, linetype="dotted", 
             color = "darkred", size=1.6)

ggsave("P13_I4_e.jpeg", device='jpeg', dpi=700)

### Skewness of distribution of an absolute negative error 

skewness(PBS_data$I4_e, na.rm = TRUE) 

### Perceived vs actual level of spending scatterplot

I4_1 <- svymean(~I4_1 + I4_2 + I4_3 + I4_4 + I4_5 + I4_6 + I4_7 + I4_8, srs_design, na = TRUE)

I4_1 <- as.data.table(I4_1)

I4_1[, category := labels]

I4_1[, actual := c(6, 9, 14, 34, 13, 11, 11, 3)]

I4_1 <- I4_1[, .(category, perceived = mean, actual)]

annotation <- data.frame(
  x = c(28, 8),
  y = c(8,26),
  label = c("Less spending than perceived", "More spending than perceived")
)

I4_1_p <- ggplot(I4_1, aes(x=perceived, y=actual)) +
  geom_point() + theme_light() + scale_fill_manual (values=c( "firebrick", "forestgreen","dodgerblue4", "gold1", "darkgrey")) + 
  theme(axis.text.x = element_text(color = "grey20", size = 18, angle = 90, hjust = .5, vjust = .5, face = "plain"),
        axis.text.y = element_text(color = "grey20", size = 18, angle = 0, hjust = 1, vjust = 0, face = "plain"),  
        axis.title.x = element_text(color = "grey20", size = 18, angle = 0, hjust = .5, vjust = 0, face = "plain"),
        axis.title.y = element_text(color = "grey20", size = 18, angle = 90, hjust = .5, vjust = .5, face = "plain"),
        legend.text = element_text(color = "grey20", size = 18, hjust = .5, vjust = .5, face = "plain"),
        legend.title = element_text(color = "grey20", size = 18, face = "plain"),
        legend.position="bottom",
        legend.background = element_blank(),
        legend.box.background = element_rect(colour = "grey20")) + 
  geom_text(data=annotation, aes(x=x, y=y, label=label),                 
            color = "black", 
            size= 4.5, fontface="bold" ) +
  geom_abline(linetype = "dashed", intercept = 0, slope = 1, size = 0.8, colour = "grey80") + 
  labs(x = "Average perceived level of spending (%)", y = "Actual level of spending (%)", color = "") +
  xlim(0, 34) + ylim(0, 34) + 
  geom_label_repel(aes(label = category), size = 5,
  box.padding   = 0.35, 
  point.padding = 0.5,
  nudge_x = 1,
  segment.color = 'grey50') 

ggsave("I4_1_p.jpeg", device='jpeg', dpi=700)

### Treatment effect for increase in spending vs error of prior beliefs

I6_P12_long <- merge(I5, P12, by = "category", suffixes = c(".treatment", ".control"))

I6_P12_long <- I6_P12_long[, .(category, mean.treatment, mean.control)]

I6_P12_long[, treatment_effect := abs(mean.treatment - mean.control)]

I6_P12_long <- I6_P12_long[,.(category, treatment_effect)]

I4_1[, error := -abs(perceived - actual)]

I4_1 <- I4_1[, .(category, error)]

I6_P12_I4_1 <- merge(I6_P12_long, I4_1, by = "category")

I6_P12_I4_1 <- I6_P12_I4_1[order(-treatment_effect)]

### Treatment effect for decrease in spending vs error of prior beliefs

I5_P11_long <- merge(I5, P11, by = "category", suffixes = c(".treatment", ".control"))

I5_P11_long <- I5_P11_long[, .(category, mean.treatment, mean.control)]

I5_P11_long [, treatment_effect := abs(mean.treatment - mean.control)]

I5_P11_long <- I5_P11_long[,.(category, treatment_effect)]

I4_1[, error := -abs(perceived - actual)]

I5_P11_I4_1 <- merge(I5_P11_long, I4_1, by = "category")

I5_P11_I4_1 <- I5_P11_I4_1[order(-treatment_effect)]

### Treatment effects - increase vs decrease 

treatment_comp <- merge(I5_P11_I4_1, I6_P12_I4_1, by = "category", suffixes = c(".decrease", ".increase"))

treatment_comp <- treatment_comp[, .(category, treatment_effect.decrease = treatment_effect.decrease*100, error.decrease = -error.decrease)]

treatment_comp_plot <- ggplot(treatment_comp, aes(x = error.decrease, y = treatment_effect.decrease, label = treatment_comp$category)) +
  geom_point() + 
  geom_smooth(method = "lm", se = FALSE, linetype = "dashed", color = "darkred") +  # Add regression line
  theme_light() + 
  scale_fill_manual(values = c("firebrick", "forestgreen", "dodgerblue4", "gold1", "darkgrey")) + 
  theme(axis.text.x = element_text(color = "grey20", size = 20, angle = 90, hjust = .5, vjust = .5, face = "plain"),
        axis.text.y = element_text(color = "grey20", size = 20, angle = 0, hjust = 1, vjust = 0, face = "plain"),  
        axis.title.x = element_text(color = "grey20", size = 20, angle = 0, hjust = .5, vjust = 0, face = "plain"),
        axis.title.y = element_text(color = "grey20", size = 20, angle = 90, hjust = .5, vjust = .5, face = "plain"),
        legend.text = element_text(color = "grey20", size = 20, hjust = .5, vjust = .5, face = "plain"),
        legend.position = "bottom",
        legend.background = element_blank(),
        legend.box.background = element_rect(colour = "grey20")) + 
  labs(x = "Average treatment effect for cuts in public spending (pp)", y = "Prior error of perception (pp)", color = "") +
  xlim(0, 20) + ylim(0, 20) + 
  geom_label_repel(aes(label = category), size = 5,
                   box.padding   = 0.35, 
                   point.padding = 0.5,
                   nudge_x = 1,
                   segment.color = 'grey50') 

ggsave("treatment_comp_plot.jpeg", device='jpeg', dpi=700)


treatment_comp_r <- lm(treatment_effect.decrease ~ error.decrease, 
            data = treatment_comp)

lmtest::coeftest(treatment_comp_r , df = Inf)

treatment_comp[, .(error.decrease, error.increase, category, increase = treatment_effect.increase*100, decrease = treatment_effect.decrease*100)]

treatment_compcor.test(treatment_comp$treatment_effect.increase, treatment_comp$treatment_effect.decrease, method = "pearson")

treatment_comp[,binary := ifelse(treatment_comp$treatment_effect.increase > treatment_comp$treatment_effect.decrease, 1, 0)]


treatment_comp[, sum(binary)]

treatment_comp[, .(mean(treatment_effect.increase)*100, mean(treatment_effect.decrease)*100)]

### Heterogenous perception error 

srs_design <- svydesign(id=~nr_wyw, weights=~waga, data=PBS_data)

### Male

M17_perr <- as.data.table(svyby(~P13_I4_e, ~M17, srs_design, svymean, na = TRUE))

M17_perr[, ind := "Male"]

M17_perr <- M17_perr[, .(M17, ind, P13_I4_e, se)]

### Young

PBS_data[, young := ifelse(age <= 25, 1, 0)]

young_perr <- as.data.table(svyby(~P13_I4_e, ~young, srs_design, svymean, na = TRUE))

young_perr[, ind := "Young"]

young_perr <- young_perr[, .(young, ind, P13_I4_e, se)]

### Teritary education

PBS_data[, educated := ifelse(M3_rekod == 3, 1, 0)]

educated_perr <- as.data.table(svyby(~P13_I4_e, ~educated, srs_design, svymean, na = TRUE))

educated_perr[, ind := "Educated"]

educated_perr <- educated_perr[, .(educated, ind, P13_I4_e, se)]

### Share who indicated as highest spending category 

high_cat <- c(28, 28, 14, 16, 9, 14, 31, 7)

labs <- c("Pensions","Healthcare","Education", "Social protection","Infrastructure", 
"Public order and safety", "Public administration", "Environmental protection")

high_cat <- as.data.table(data)

high_cat[, category := labs]

high_cat$category <- factor(high_cat$category, levels = unique(high_cat$category))

high_cat_p <- ggplot(high_cat, aes(x = category, y = data)) +
  geom_bar(stat = "identity", color = "dodgerblue4", fill = "dodgerblue4",
           position = position_dodge()) +
  theme_light() +
  theme(
    axis.text.x = element_text(color = "grey20", size = 20, angle = 90, hjust = .5, vjust = .5, face = "plain"),
    axis.text.y = element_text(color = "grey20", size = 20, angle = 0, hjust = 1, vjust = 0, face = "plain"),
    axis.title.x = element_text(color = "grey20", size = 20, angle = 0, hjust = .5, vjust = 0, face = "plain"),
    axis.title.y = element_text(color = "grey20", size = 20, angle = 90, hjust = .5, vjust = .5, face = "plain"),
    legend.text = element_text(color = "grey20", size = 20, hjust = .5, vjust = .5, face = "plain"),
    legend.title = element_text(color = "grey20", size = 20, face = "plain"),
    legend.position = "bottom",
    legend.background = element_blank(),
    legend.box.background = element_rect(colour = "grey20")
  ) +
  labs(color = 'Status') + labs(x = "Category of spending", y = "Share of respondents (%)", color = "") +
  geom_text(aes(label = paste0(data, "%"), y = data),
            position = position_dodge(width = 0.9), size = 5, fontface = "bold", vjust = -0.3) +
  ylim(0, 33)

ggsave("high_cat_p.jpeg", device='jpeg', width = 8, height = 8, dpi=1000)

### Average error of individual perception vs error of average perception

error_individual <- svymean(~I4_1_e+I4_2_e+I4_3_e + I4_4_e + I4_5_e + I4_6_e + I4_7_e + I4_8_e, srs_design, na = TRUE)

error_individual <- as.data.table(error_individual)

error_individual[, category := labels]

error_individual <- error_individual[, .(error_individual = mean, category)]

error_average <- I4_1[, .(category, error_average = error)]

error_average_individual <- merge(error_average, error_individual, by = c("category"))

error_average_individual[, mean(error_average)]
